Install packages either directly in R or individually from bioconductor then load as below
library(Rmisc)
library(tidyverse)
library(QFeatures)
library(limma)
library(patchwork)
library(readxl)
library(clusterProfiler)
library(org.Mm.eg.db)
library(wesanderson)
library(ggExtra)
library(ggrepel)
library(plotly)
library(scales)
library(ggVennDiagram)
library(ggpubr)
Package Versions
print(sessionInfo())
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6.8
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Australia/Sydney
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggpubr_0.6.0 ggVennDiagram_1.5.2
## [3] scales_1.3.0 plotly_4.10.4
## [5] ggrepel_0.9.3 ggExtra_0.10.1
## [7] wesanderson_0.3.7 org.Mm.eg.db_3.17.0
## [9] AnnotationDbi_1.62.2 clusterProfiler_4.8.2
## [11] readxl_1.4.3 patchwork_1.2.0.9000
## [13] limma_3.56.2 QFeatures_1.10.0
## [15] MultiAssayExperiment_1.26.0 SummarizedExperiment_1.30.2
## [17] Biobase_2.60.0 GenomicRanges_1.52.0
## [19] GenomeInfoDb_1.36.2 IRanges_2.34.1
## [21] S4Vectors_0.38.1 BiocGenerics_0.46.0
## [23] MatrixGenerics_1.12.3 matrixStats_1.0.0
## [25] lubridate_1.9.2 forcats_1.0.0
## [27] stringr_1.5.0 dplyr_1.1.3
## [29] purrr_1.0.2 readr_2.1.4
## [31] tidyr_1.3.0 tibble_3.2.1
## [33] ggplot2_3.5.1 tidyverse_2.0.0
## [35] Rmisc_1.5.1 plyr_1.8.8
## [37] lattice_0.21-8
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
## [4] magrittr_2.0.3 farver_2.1.1 rmarkdown_2.24
## [7] zlibbioc_1.46.0 vctrs_0.6.3 memoise_2.0.1
## [10] RCurl_1.98-1.12 ggtree_3.8.2 rstatix_0.7.2
## [13] htmltools_0.5.6 S4Arrays_1.2.0 broom_1.0.5
## [16] cellranger_1.1.0 gridGraphics_0.5-1 sass_0.4.7
## [19] bslib_0.5.1 htmlwidgets_1.6.2 cachem_1.0.8
## [22] igraph_1.5.1 mime_0.12 lifecycle_1.0.3
## [25] pkgconfig_2.0.3 gson_0.1.0 Matrix_1.6-1
## [28] R6_2.5.1 fastmap_1.1.1 shiny_1.7.5
## [31] GenomeInfoDbData_1.2.10 clue_0.3-64 digest_0.6.33
## [34] aplot_0.2.0 enrichplot_1.20.0 colorspace_2.1-0
## [37] RSQLite_2.3.1 fansi_1.0.4 timechange_0.2.0
## [40] httr_1.4.7 polyclip_1.10-4 abind_1.4-5
## [43] compiler_4.3.1 bit64_4.0.5 withr_2.5.0
## [46] downloader_0.4 backports_1.4.1 BiocParallel_1.34.2
## [49] carData_3.0-5 viridis_0.6.4 DBI_1.1.3
## [52] ggforce_0.4.1 ggsignif_0.6.4 MASS_7.3-60
## [55] DelayedArray_0.26.7 HDO.db_0.99.1 tools_4.3.1
## [58] scatterpie_0.2.1 ape_5.7-1 httpuv_1.6.11
## [61] glue_1.6.2 promises_1.2.1 nlme_3.1-163
## [64] GOSemSim_2.26.1 shadowtext_0.1.2 grid_4.3.1
## [67] cluster_2.1.4 reshape2_1.4.4 fgsea_1.26.0
## [70] generics_0.1.3 gtable_0.3.4 tzdb_0.4.0
## [73] data.table_1.14.8 hms_1.1.3 car_3.1-2
## [76] tidygraph_1.2.3 utf8_1.2.3 XVector_0.40.0
## [79] pillar_1.9.0 yulab.utils_0.0.9 later_1.3.1
## [82] splines_4.3.1 tweenr_2.0.2 treeio_1.24.3
## [85] bit_4.0.5 tidyselect_1.2.0 GO.db_3.17.0
## [88] miniUI_0.1.1.1 Biostrings_2.68.1 knitr_1.43
## [91] gridExtra_2.3 ProtGenerics_1.32.0 xfun_0.40
## [94] graphlayouts_1.0.0 stringi_1.7.12 lazyeval_0.2.2
## [97] ggfun_0.1.2 yaml_2.3.7 evaluate_0.21
## [100] codetools_0.2-19 ggraph_2.1.0 MsCoreUtils_1.12.0
## [103] qvalue_2.32.0 ggplotify_0.1.2 cli_3.6.1
## [106] xtable_1.8-4 munsell_0.5.0 jquerylib_0.1.4
## [109] Rcpp_1.0.11 png_0.1-8 parallel_4.3.1
## [112] ellipsis_0.3.2 blob_1.2.4 DOSE_3.26.1
## [115] AnnotationFilter_1.24.0 bitops_1.0-7 tidytree_0.4.5
## [118] viridisLite_0.4.2 crayon_1.5.2 rlang_1.1.1
## [121] cowplot_1.1.1 fastmatch_1.1-4 KEGGREST_1.40.0
Colour scheme for plots
diet_colors <- wes_palette("Moonrise3", 3, type = "discrete")
Read in proteomics data and metadata
proteinGroups <- read.delim("/Users/lsma0320/Documents/GitHub/Lipogenesis-Proteomics/proteinGroups.txt")
lipogenesis_metadata <- read_excel("/Users/lsma0320/Documents/GitHub/Lipogenesis-Proteomics/lipogenesis_metadata.xlsx",sheet = 3)
Subset data to filter out contaminants, reverse and only ID by site
sub1 <- subset(proteinGroups, Potential.contaminant =="")
sub2 <- subset(sub1, Reverse =="")
sub3 <- subset(sub2, Only.identified.by.site =="")
Subset data to retain proteins identified using 2 or more unique peptides
FilterALL <- subset(sub3, Unique.peptides >= 2)
Select Protein IDs and LFQ intensities in proteinGroups data
LFQdata1 <- subset(FilterALL, select = c(1,7, 541:614))
colnames(LFQdata1) <- sub(".*4WEEKS_","",colnames(LFQdata1))
colnames(LFQdata1) <- sub(".*30WEEKS_","",colnames(LFQdata1))
# Convert 0 values to NAs
LFQdata1[LFQdata1 == 0] <- NA
# Make a unique gene.names column
LFQdata1 <- mutate(LFQdata1,unique.gene.name = sub(";.*", "",LFQdata1$Gene.names)) %>% relocate(unique.gene.name,.after = Gene.names)
Wrangle metadata
lipogenesis_metadata$Animal_ID <- as.character(lipogenesis_metadata$Animal_ID)
lipogenesis_metadata$liver_lipogenesis <- as.numeric(lipogenesis_metadata$liver_lipogenesis)
## Warning: NAs introduced by coercion
dim(lipogenesis_metadata)
## [1] 75 12
# Filter metadata for samples ran on mass spec.
lipogenesis_metadata <- filter(lipogenesis_metadata, Animal_ID %in% colnames(LFQdata1[,4:77])) %>% mutate(sub_group = paste(Diet,time_on_diet,sep = "_"))
dim(lipogenesis_metadata)
## [1] 74 13
Transform dataframe to matrix
mat <- as.matrix(column_to_rownames(remove_rownames(LFQdata1[,c(1,4:77)]),"Protein.IDs"))
# subset matrix to be the same sample order as metadata
mat <- mat[,lipogenesis_metadata$Animal_ID]
identical(colnames(mat),lipogenesis_metadata$Animal_ID)
## [1] TRUE
Correlation between samples to identify samples with technical issues
cor_matrix <- cor(na.omit(mat))
heatmap(cor(na.omit(mat)))
sample_cor <- data.frame(colMeans(cor_matrix))
high_variable_samples <- filter(sample_cor,sample_cor$colMeans.cor_matrix.<0.9)
print(high_variable_samples)
## colMeans.cor_matrix.
## 20846 0.8271025
## 16699 0.7037851
## 16728 0.8605940
## 16730 0.6693383
## 16739 0.7324234
## 16740 0.6809605
barplot(colMeans(cor_matrix))
Make a summarized experiment object
protein_info <- column_to_rownames(remove_rownames(LFQdata1[,1:3]),"Protein.IDs")
sumExp <- SummarizedExperiment(assays = mat, colData=column_to_rownames(remove_rownames(lipogenesis_metadata),"Animal_ID"), protein_info)
nNA(sumExp)
## $nNA
## DataFrame with 1 row and 2 columns
## nNA pNA
## <integer> <numeric>
## 1 132070 32.3379
##
## $nNArows
## DataFrame with 5519 rows and 3 columns
## name nNA pNA
## <character> <integer> <numeric>
## 1 A0A075B5M2... 73 98.64865
## 2 A0A075B5M7... 49 66.21622
## 3 A0A075B5P3... 3 4.05405
## 4 A0A075B5P4... 0 0.00000
## 5 A0A075B5P5... 13 17.56757
## ... ... ... ...
## 5515 S4R2A9 41 55.4054
## 5516 S4R2K0 66 89.1892
## 5517 S4R2K9;A0A... 0 0.0000
## 5518 V9GX38 73 98.6486
## 5519 V9GX76;E9Q... 0 0.0000
##
## $nNAcols
## DataFrame with 74 rows and 3 columns
## name nNA pNA
## <character> <integer> <numeric>
## 1 20818 1511 27.3781
## 2 20819 1565 28.3566
## 3 20820 1615 29.2625
## 4 20821 1874 33.9554
## 5 20822 1492 27.0339
## ... ... ... ...
## 70 16738 1946 35.2600
## 71 16739 2982 54.0315
## 72 16740 2905 52.6363
## 73 16741 1953 35.3868
## 74 16742 1932 35.0063
Log transformation
sumExp <- QFeatures::logTransform(sumExp)
boxplot(assay(sumExp))
Normalisation, here I use “diff.median”
sumExp <- QFeatures::normalize(sumExp,method="diff.median")
boxplot(assay(sumExp))
limma::plotDensities(assay(sumExp))
Filter out samples with low sample to sample CV
sumExp <- sumExp[,!(rownames(sumExp@colData) %in% rownames(high_variable_samples))]
cor_matrix_without_outliers <- cor(na.omit(assay(sumExp)))
heatmap(cor_matrix_without_outliers)
barplot(colMeans(cor_matrix_without_outliers))
limma::plotDensities(assay(sumExp))
Subset 4 week data
four_week_data <- sumExp[,sumExp$time_on_diet == "four_weeks"]
dim(four_week_data)
## [1] 5519 44
Filter proteins with high missing values and identify values missing at random (MAR) and values missing not at random by looking at the distribution of NAs between groups.
missing_tidy_4_weeks <- as_tibble(assay(four_week_data), rownames = "proteins") %>% pivot_longer(cols=-"proteins",names_to = "Animal_ID",values_to = "LogIntensity") %>% inner_join(lipogenesis_metadata,by="Animal_ID")
Missing_table_4_weeks <- missing_tidy_4_weeks %>%
dplyr::group_by(Diet,proteins) %>%
summarise(sum_not_na = sum(!is.na(LogIntensity))) %>%
pivot_wider(names_from = Diet,values_from = sum_not_na) %>%
mutate(p_not_na_chow = chow/length(which(four_week_data@colData$Diet == "chow"))*100) %>%
mutate(p_not_na_starch = starch/length(which(four_week_data@colData$Diet == "starch"))*100) %>%
mutate(p_not_na_fat = fat/length(which(four_week_data@colData$Diet == "fat"))*100) %>%
rowwise() %>%
mutate(largest_dif=max(abs(diff(c(p_not_na_chow,p_not_na_starch,p_not_na_fat)))))
## `summarise()` has grouped output by 'Diet'. You can override using the
## `.groups` argument.
hist(Missing_table_4_weeks$largest_dif)
# Filtering out proteins that were detected in less than 8 samples in each diet unless dtermined MNAR
Missing_table_4_weeks <- mutate(Missing_table_4_weeks,MAR = if_else(largest_dif >= 60,FALSE,TRUE))%>%
mutate(Keep=if_else(MAR==FALSE|(chow >= 8 & starch >= 8 & fat >= 8),"YES","NO"))
table(Missing_table_4_weeks$Keep)
##
## NO YES
## 1768 3751
protein_table_4_weeks <- full_join(rownames_to_column(protein_info,"proteins"),Missing_table_4_weeks,by = "proteins")
filtered_sumExp_4_weeks <- SummarizedExperiment(assays= assay(four_week_data), colData=four_week_data@colData, rowData=protein_table_4_weeks)
filtered_sumExp_4_weeks <- filtered_sumExp_4_weeks[filtered_sumExp_4_weeks@elementMetadata$Keep == "YES",]
Imputation, low values are imputed in for samples that are not MAR (MNAR) using a left-censored imputation (QRILC) so that proteins that have a pattern of MNAR can be statistically analysed (Only 7 proteins are MNAR)
imputed_sumExp_4_weeks <- QFeatures::impute(filtered_sumExp_4_weeks, method = "mixed",
randna = filtered_sumExp_4_weeks@elementMetadata$MAR,
mar = "none",
mnar = "QRILC")
## Loading required namespace: imputeLCMD
## Imputing along margin 1 (features/rows).
MDS to visualize shape of data
mds_function <- function(dataset){
mds <- plotMDS(assay(dataset), plot = FALSE,top=500)
mds_df <- data.frame(Animal_ID = colnames(mds$distance.matrix.squared),X = mds$x, Y = mds$y,Dim1 = mds$eigen.vectors[,1], Dim2 = mds$eigen.vectors[,2], Dim3 = mds$eigen.vectors[,3],
Diet=dataset@colData$Diet)
MDS_plotter <- function(assay_data,colored_by){
df1 <- na.omit(assay_data)
graph1 <- ggplot(df1, aes_string(x="Dim1", y="Dim2", color=colored_by, text = "Animal_ID")) + geom_point(size=3)
graph2 <- ggplot(df1, aes_string(x="Dim1", y="Dim3", color=colored_by, text = "Animal_ID")) + geom_point(size=3)
graph3 <- ggplot(df1, aes_string(x="Dim2", y="Dim3", color=colored_by, text = "Animal_ID")) + geom_point(size=3)
return(graph1+graph2+graph3+plot_layout(guides = 'collect'))}
graph <- MDS_plotter(mds_df,"Diet")
return(graph)}
mds_function(imputed_sumExp_4_weeks)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Several outliers identified on MDS, which correspond with samples that had contamination from muscle therefore they are excluded from the dataset
outliers_4_week <- c("20842","21167","20833","20821","21182")
imputed_sumExp_4_weeks_no_outliers <- imputed_sumExp_4_weeks[,!colnames(imputed_sumExp_4_weeks) %in% outliers_4_week]
mds_function(imputed_sumExp_4_weeks_no_outliers)
Making a tidy table for plotting in ggplot
Meta_tidy_4_weeks <- as.data.frame(imputed_sumExp_4_weeks_no_outliers@colData) %>% rownames_to_column(var = "Animal_ID")
protein_tibble_4_weeks <- as_tibble(assay(imputed_sumExp_4_weeks_no_outliers), rownames = "proteins") %>% full_join(protein_table_4_weeks[,1:3],by="proteins")
tidy_protein_table_4_weeks <- pivot_longer(protein_tibble_4_weeks,cols= -c(proteins,Gene.names,unique.gene.name),names_to = "Animal_ID",values_to = "LogIntensity")
tidy_protein_table_4_weeks <- inner_join(Meta_tidy_4_weeks,tidy_protein_table_4_weeks,by="Animal_ID")
tidy_protein_table_4_weeks$Diet <- factor(tidy_protein_table_4_weeks$Diet, levels = c("chow","starch","fat"))
Limma analysis for continuous variables, lipogenesis and triglyceride content (4 weeks only)
limma_continuous_phenotype <- function(phenotype){
pheno_meta <- as_tibble(imputed_sumExp_4_weeks_no_outliers@colData,rownames = "Animal_ID") %>% drop_na(phenotype) %>% filter(phenotype > 0)
pheno_m <- assay(imputed_sumExp_4_weeks_no_outliers)[,pheno_meta$Animal_ID]
pheno <- log2(pull(pheno_meta, phenotype))
pheno_diet <- factor(pheno_meta$Diet)
pheno_cohort <- factor(pheno_meta$Cohort)
pheno_model <- model.matrix(~pheno)
is.fullrank(pheno_model)
fit_pheno <- lmFit(pheno_m, pheno_model)
fitCont_pheno <- eBayes(fit_pheno,trend = T,robust = T)
plotSA(fitCont_pheno)
pheno_res <- topTable(fitCont_pheno, coef = 2, number = Inf, sort.by = "none")
pheno_res <- left_join(rownames_to_column(pheno_res,var="proteins"),protein_table_4_weeks[,1:3],by="proteins")
return(pheno_res)}
lipo_res <- limma_continuous_phenotype("liver_lipogenesis")
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(phenotype)
##
## # Now:
## data %>% select(all_of(phenotype))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Unknown or uninitialised column: `Cohort`.
count(lipo_res$adj.P.Val<0.05&lipo_res$logFC>0)
## [1] 77
count(lipo_res$adj.P.Val<0.05&lipo_res$logFC<0)
## [1] 55
lipo_res_positive <- filter(lipo_res,logFC>0 & adj.P.Val < 0.05)
trig_res <- limma_continuous_phenotype("liver_triglyceride_content")
## Warning: Unknown or uninitialised column: `Cohort`.
trig_res_positive <- filter(trig_res,logFC>0 & adj.P.Val < 0.05)
count(trig_res$adj.P.Val<0.05&trig_res$logFC>0)
## [1] 120
count(trig_res$adj.P.Val<0.05&trig_res$logFC<0)
## [1] 82
Volcano plots for the continuous measurements
volcanoplotter_pheno <- function(results_table,x_limits,label_n,gradient_limits){
top_genes <- slice_min(results_table,order_by = P.Value, n =label_n)
rt <- mutate(results_table, Stat_Sig = ifelse(adj.P.Val < 0.05,T,F)) %>% mutate(plot = ifelse(proteins %in% top_genes$proteins,T,F))
graph <- ggplot(rt, aes(x=logFC, y=-log10(P.Value),text=Gene.names))+geom_point(aes(color=logFC,alpha=Stat_Sig))+ geom_text_repel(
data=rt %>% filter(plot==T),
aes(label=unique.gene.name),show.legend = F,color="black",position = position_nudge(y = 0.2))+
scale_x_continuous(limits=x_limits)+
scale_color_gradientn(colours=c("blue","grey","red"),limits = gradient_limits)+
theme_classic()+theme(panel.border = element_rect(colour = "black", fill=NA, linewidth=1.5))+
scale_alpha_manual(values=c(0.1,1))
return(graph)}
VP1 <- volcanoplotter_pheno(lipo_res,c(-1.6,1.6),44,c(-1.6,1.6))
VP2 <- volcanoplotter_pheno(trig_res,c(-1,1),45,c(-1,1))
VP1
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
VP2
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: ggrepel: 29 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Pathway analysis (Gene over representation analysis using clusterprofiler)
GO_function <- function(results_sheet,GO_type,direction, adj_p_val_cutoff){
upregulated_proteins <- filter(results_sheet, adj.P.Val < adj_p_val_cutoff & logFC > 0) %>% filter(unique.gene.name != "")
upregulated_proteins = bitr(upregulated_proteins$unique.gene.name, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Mm.eg.db)
downregulated_proteins <- filter(results_sheet, adj.P.Val < adj_p_val_cutoff & logFC < 0) %>% filter(unique.gene.name != "")
downregulated_proteins = bitr(downregulated_proteins$unique.gene.name, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Mm.eg.db)
background = bitr(lipo_res$unique.gene.name, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Mm.eg.db)
if(direction == "up"){dat = upregulated_proteins}
else if(direction == "down"){dat = downregulated_proteins}
GO <- enrichGO(gene = dat$ENTREZID,OrgDb = org.Mm.eg.db,ont = GO_type,universe = background$ENTREZID,readable = T,keyType = "ENTREZID")
GO <- simplify(GO,cutoff=0.7)
return(GO)}
upregulated_lipo_GO <- GO_function(lipo_res,"BP","up",0.01)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(upregulated_proteins$unique.gene.name, fromType = "SYMBOL", :
## 7.89% of input gene IDs are fail to map...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(downregulated_proteins$unique.gene.name, fromType = "SYMBOL", :
## 6.67% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(lipo_res$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 5.53% of input gene IDs are fail to map...
downregulated_lipo_GO <- GO_function(lipo_res,"BP","down",0.01)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(upregulated_proteins$unique.gene.name, fromType = "SYMBOL", :
## 7.89% of input gene IDs are fail to map...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(downregulated_proteins$unique.gene.name, fromType = "SYMBOL", :
## 6.67% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(lipo_res$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 5.53% of input gene IDs are fail to map...
clusterProfiler::dotplot(upregulated_lipo_GO)
upregulated_trig_GO <- GO_function(trig_res,"BP","up",0.01)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(downregulated_proteins$unique.gene.name, fromType = "SYMBOL", :
## 3.57% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(lipo_res$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 5.53% of input gene IDs are fail to map...
downregulated_trig_GO <- GO_function(trig_res,"BP","down",0.01)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(downregulated_proteins$unique.gene.name, fromType = "SYMBOL", :
## 3.57% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(lipo_res$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 5.53% of input gene IDs are fail to map...
clusterProfiler::dotplot(upregulated_trig_GO)
Plots of linear regression of protein abundance vs continuous phenotype
lr_protein_name <- function(protein_names,pheno,ncols,line){
dat <- rename(tidy_protein_table_4_weeks, "pheno" = pheno)
graph <- filter(dat, proteins %in% protein_names) %>% ggplot(aes(x = log2(pheno), y = LogIntensity)) +
geom_smooth(color="black",method="lm",linetype = line) +
geom_point(aes(color=Diet),alpha=0.7) +
facet_wrap(~unique.gene.name, scales ="free_y",ncol=ncols)+
scale_color_manual(values=diet_colors)+
scale_x_continuous(breaks = c(3,4,5))+
xlab("Log Lipogenesis")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=1))
return(graph)}
top_5_lipo_positive <- slice_min(filter(lipo_res,logFC>0),order_by = P.Value,n = 5)
top_5_lipo_negative <- slice_min(filter(lipo_res,logFC<0),order_by = P.Value,n = 5)
lr_protein_name(top_5_lipo_positive$proteins,"liver_lipogenesis",5,"solid")/
lr_protein_name(top_5_lipo_negative$proteins,"liver_lipogenesis",5,"solid")
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(pheno)
##
## # Now:
## data %>% select(all_of(pheno))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
top_3_trig_positive <- slice_min(filter(trig_res,logFC>0),order_by = P.Value,n = 3)
top_3_lipo_positive <- slice_min(filter(lipo_res,logFC>0),order_by = P.Value,n = 3)
lr_protein_name(top_3_trig_positive$proteins,"liver_triglyceride_content",3,"solid")/
lr_protein_name(top_3_lipo_positive$proteins,"liver_triglyceride_content",3,"blank")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Limma comparing different diets
limma_diets <- function(exp_obj){
diet <- factor(exp_obj@colData$Diet)
model <- model.matrix(~0+diet)
is.fullrank(model)
head(model)
contrasts <- makeContrasts(fat_v_chow = (dietfat - dietchow)
, starch_v_chow = (dietstarch - dietchow)
, fat_v_starch = (dietfat - dietstarch)
,levels = model)
fit <- lmFit(assay(exp_obj), model)
plotSA(fit)
diet_res <- lapply(colnames(contrasts), function(cont){
fitCont <- contrasts.fit(fit, contrasts = contrasts[,cont])
fitCont <- eBayes(fitCont,trend = T,robust = T)
plotSA(fitCont)
dat <- topTable(fitCont, number = Inf, sort.by = "none")
left_join(rownames_to_column(dat,var="proteins"),protein_table_4_weeks[,1:3],by="proteins")
})
names(diet_res) <- colnames(contrasts)
return(diet_res)}
diet_res <- limma_diets(imputed_sumExp_4_weeks_no_outliers)
hist(diet_res$fat_v_chow$P.Value)
count(diet_res$fat_v_chow$adj.P.Val < 0.05)
## [1] 498
count(diet_res$fat_v_chow$adj.P.Val < 0.05 & diet_res$fat_v_chow$logFC>0)
## [1] 245
count(diet_res$fat_v_chow$adj.P.Val < 0.05 & diet_res$fat_v_chow$logFC<0)
## [1] 253
hist(diet_res$starch_v_chow$P.Value)
count(diet_res$starch_v_chow$adj.P.Val < 0.05 & diet_res$starch_v_chow$logFC>0)
## [1] 186
count(diet_res$starch_v_chow$adj.P.Val < 0.05 & diet_res$starch_v_chow$logFC<0)
## [1] 277
hist(diet_res$fat_v_starch$P.Value)
count(diet_res$fat_v_starch$adj.P.Val < 0.05 & diet_res$fat_v_starch$logFC>0)
## [1] 125
count(diet_res$fat_v_starch$adj.P.Val < 0.05 & diet_res$fat_v_starch$logFC<0)
## [1] 75
Limma diets corrected for triglyceride content (to look at the chow specific proteome)
limma_diet_corrected <- function(exp_obj){
meta <- as_tibble(exp_obj@colData,rownames = "Animal_ID") %>% drop_na(liver_lipogenesis) %>% filter(liver_lipogenesis > 0)
exp <- exp_obj[,meta$Animal_ID]
diet <- factor(exp@colData$Diet)
trig <- exp$liver_triglyceride_content
#lipo <- exp$liver_lipogenesis
model <- model.matrix(~0+diet+trig)
is.fullrank(model)
head(model)
contrasts <- makeContrasts(fat_v_chow = (dietfat - dietchow)
, starch_v_chow = (dietstarch - dietchow)
, fat_v_starch = (dietfat - dietstarch)
, starch_and_fat_v_chow = ((dietfat + dietstarch)/2-dietchow)
,levels = model)
fit <- lmFit(assay(exp), model)
plotSA(fit)
diet_res <- lapply(colnames(contrasts), function(cont){
fitCont <- contrasts.fit(fit, contrasts = contrasts[,cont])
fitCont <- eBayes(fitCont,trend = T,robust = T)
plotSA(fitCont)
dat <- topTable(fitCont, number = Inf, sort.by = "none")
left_join(rownames_to_column(dat,var="proteins"),protein_table_4_weeks[,1:3],by="proteins")
})
names(diet_res) <- colnames(contrasts)
return(diet_res)}
diet_res_corrected <- limma_diet_corrected(imputed_sumExp_4_weeks_no_outliers)
hist(diet_res_corrected$starch_and_fat_v_chow$P.Value)
count(diet_res_corrected$starch_and_fat_v_chow$adj.P.Val < 0.05)
## [1] 311
Volcano plots between diets
volcanoplotter <- function(results_table,x_limits,colors,n_proteins){
res <- results_table
top_genes <- slice_min(res,order_by = P.Value, n =n_proteins)
rt <- mutate(res, Stat_Sig = ifelse(adj.P.Val < 0.01,T,F)) %>% mutate(plot = ifelse(proteins %in% top_genes$proteins,T,F)) %>%
mutate(direction = ifelse(logFC > 0,"up","down"))
graph <- ggplot(rt, aes(x=logFC, y=-log10(P.Value), color=direction, text=unique.gene.name))+geom_point(aes(alpha=Stat_Sig))+ geom_text_repel(
data=rt %>% filter(plot==T),
aes(label=unique.gene.name),show.legend = F,color="black",position = position_nudge(y = 0.2))+
scale_x_continuous(limits=x_limits)+
theme_classic()+theme(panel.border = element_rect(colour = "black", fill=NA, linewidth=1.5))+
scale_color_manual(values = colors)+
scale_alpha_manual(values=c(0.1,1))
return(graph)}
V_FvC <- volcanoplotter(diet_res$fat_v_chow,c(-3,3),c(diet_colors[1],diet_colors[3]),100)
V_SvC <- volcanoplotter(diet_res$starch_v_chow,c(-3,3),c(diet_colors[1],diet_colors[2]),100)
V_FvS <- volcanoplotter(diet_res$fat_v_starch,c(-3,3),c(diet_colors[2],diet_colors[3]),100)
V_FvC/V_SvC/V_FvS
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 91 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 93 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 89 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Venn diagram of triglyceride corrected data
sig_lists <- list(fat_v_chow = filter(diet_res_corrected$fat_v_chow,adj.P.Val<0.05)[,1], starch_v_chow = filter(diet_res_corrected$starch_v_chow,adj.P.Val<0.05)[,1], fat_v_starch = filter(diet_res_corrected$fat_v_starch,adj.P.Val<0.05)[,1])
chow_proteins <- intersect(sig_lists$fat_v_chow,sig_lists$starch_v_chow)
ggVennDiagram(sig_lists,label_alpha = 0)
chow_proteins <- setdiff(intersect(sig_lists$fat_v_chow,sig_lists$starch_v_chow),sig_lists$fat_v_starch)
chow_proteins <- filter(diet_res_corrected$starch_and_fat_v_chow,proteins %in% chow_proteins)
#Venn diagram of lipo proteins vs trig proteins
lipo_vs_trig_proteins <- intersect(lipo_res_positive$proteins,trig_res_positive$proteins)
GO analysis of chow specific proteins
GO_function_chow <- function(results_sheet,GO_type){
res <- filter(results_sheet, logFC<0)
proteins = bitr(res$unique.gene.name, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Mm.eg.db)
background = bitr(diet_res_corrected$starch_and_fat_v_chow$unique.gene.name, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Mm.eg.db)
GO <- enrichGO(gene = proteins$ENTREZID,OrgDb = org.Mm.eg.db,ont = GO_type,universe = background$ENTREZID,readable = T,keyType = "ENTREZID")
GO <- simplify(GO,0.5)
return(GO)}
chow_GSE <- GO_function_chow(chow_proteins,"BP")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(res$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 4.44% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(diet_res_corrected$starch_and_fat_v_chow$unique.gene.name, :
## 5.53% of input gene IDs are fail to map...
chow_GSE@result %>% mutate(Description = fct_reorder(Description, -pvalue))%>% ggplot(aes(x=-log10(p.adjust),y=Description,fill=Count),color="black")+geom_col()+theme_classic()
Gene Set Enrichment Analysis for diets
GSE_function <- function(results_table,GO_type){
gene.df = bitr(results_table$unique.gene.name, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Mm.eg.db)
gene.df <- distinct(gene.df,SYMBOL,.keep_all = T)
gene.df <- inner_join(gene.df,results_table,by = c("SYMBOL"="unique.gene.name")) %>% mutate("rank_metric"=-log10(P.Value)*sign(logFC))
geneList = gene.df[,"rank_metric"]
names(geneList)= gene.df$ENSEMBL
geneList = sort(geneList, decreasing = TRUE)
GSE_BP <- gseGO(geneList = geneList,
OrgDb = org.Mm.eg.db,
ont = GO_type,
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE,
keyType = "ENSEMBL")
GSE_BP <- setReadable(GSE_BP,OrgDb = org.Mm.eg.db,keyType = "ENSEMBL")
GSE_BP <- simplify(GSE_BP)
return(GSE_BP)}
GSE_FvC <- GSE_function(diet_res$fat_v_chow,"BP")
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(results_table$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 5.53% of input gene IDs are fail to map...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
GSE_SvC <- GSE_function(diet_res$starch_v_chow,"BP")
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(results_table$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 5.53% of input gene IDs are fail to map...
## Warning in bitr(results_table$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : There are duplicate gene names, fgsea may produce unexpected
## results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
GSE_FvS <- GSE_function(diet_res$fat_v_starch,"BP")
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(results_table$unique.gene.name, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 5.53% of input gene IDs are fail to map...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
GSE_barplot_function <- function(sheet,colors){
graph <- slice_min(data.frame(sheet),order_by = pvalue,n=8) %>% mutate(Description = fct_reorder(Description, -pvalue)) %>% mutate(Direction = if_else(enrichmentScore>0,"Positive","Negative")) %>%
ggplot(aes(x=-log10(p.adjust),fill=Direction,y=Description))+geom_col()+scale_fill_manual(values = colors)+theme_classic()+scale_y_discrete(position="right")+theme(axis.line.y =element_blank())
return(graph)
}
GSE_barplot_function(GSE_FvC,c(diet_colors[1],diet_colors[3]))/GSE_barplot_function(GSE_SvC,c(diet_colors[1],diet_colors[2]))/GSE_barplot_function(GSE_FvS,c(diet_colors[2],diet_colors[3]))
Heatmap for chow specific proteins
chow_matrix <- imputed_sumExp_4_weeks_no_outliers[chow_proteins$proteins,]
library(pheatmap)
chow_proteins <- mutate(chow_proteins,direction = if_else(logFC>0,"up","down"))
heatmap_function <- function(sumexpobj){
col_annotations_df <- data.frame(diet = sumexpobj$Diet)
rownames(col_annotations_df) <- colnames(assay(sumexpobj))
col_annotations_df <- arrange(col_annotations_df,by=diet)
matrix <- sumexpobj[,rownames(col_annotations_df)]
row_annotations_df <- data.frame(unique.gene.name = matrix@elementMetadata$unique.gene.name, proteins = matrix@elementMetadata$proteins) %>%
left_join(chow_proteins[,c("proteins","direction")],by="proteins") %>%
column_to_rownames(var="proteins")
row_annotations_df <- arrange(row_annotations_df,direction,unique.gene.name)
matrix <- matrix[rownames(row_annotations_df),]
row_names <- row_annotations_df$unique.gene.name
row_annotations_df <- select(row_annotations_df,-unique.gene.name)
colors = list(diet = c(chow = diet_colors[1],starch = diet_colors[2], fat = diet_colors[3]), direction = c(up = "#DBCDF0",down = "#FAEDCB"))
graph <- pheatmap(mat = assay(matrix),
scale = "row",
annotation_col = col_annotations_df,
cluster_cols = F,
breaks= seq(-2, 2, 0.1),
gaps_col = (c(13,27)),
gaps_row = c(89),
color = colorRampPalette(c("blue", "white", "red"))(40),
labels_row = row_names,
cluster_rows = F,
show_colnames = F,
treeheight_row = 0,
fontsize_row = 6,
annotation_row = row_annotations_df,
annotation_colors = colors,
border_color = "grey")
return(graph)}
heatmap_function(chow_matrix)
Filter missing values and determine samples missing at random (MAR) and not missing at random
missing_tidy <- as_tibble(assay(sumExp), rownames = "proteins") %>% pivot_longer(cols=-"proteins",names_to = "Animal_ID",values_to = "LogIntensity") %>% inner_join(lipogenesis_metadata,by="Animal_ID")
Missing_table_subgroup <- missing_tidy %>%
dplyr::group_by(sub_group,proteins) %>%
summarise(sum_not_na = sum(!is.na(LogIntensity))) %>%
pivot_wider(names_from = sub_group,values_from = sum_not_na)
## `summarise()` has grouped output by 'sub_group'. You can override using the
## `.groups` argument.
Missing_table <- Missing_table_subgroup %>% mutate(Keep=if_else(chow_four_weeks >= 8 & chow_thirty_weeks >= 6 & fat_four_weeks >= 8 & fat_thirty_weeks >= 6 & starch_four_weeks >= 8 & starch_thirty_weeks >= 6,"YES","NO"))
table(Missing_table$Keep)
##
## NO YES
## 2409 3110
protein_table <- full_join(rownames_to_column(protein_info,"proteins"),Missing_table,by = "proteins")
filtered_sumExp <- SummarizedExperiment(assays= assay(sumExp), colData=sumExp@colData, rowData=protein_table)
filtered_sumExp <- filtered_sumExp[filtered_sumExp@elementMetadata$Keep == "YES",]
boxplot(assay(filtered_sumExp))
Normalisation
filtered_sumExp <- QFeatures::normalize(filtered_sumExp,method="diff.median")
boxplot(assay(filtered_sumExp))
mds_function <- function(exp_obj){
m <- assay(exp_obj)
exp <- exp_obj
mds <- plotMDS(m, plot = FALSE,top = 1000)
mds_df <- data.frame(Animal_ID = colnames(mds$distance.matrix.squared),X = mds$x, Y = mds$y,Dim1 = mds$eigen.vectors[,1], Dim2 = mds$eigen.vectors[,2], Dim3 = mds$eigen.vectors[,3],
Diet=exp@colData$Diet, timepoint = exp@colData$time_on_diet)
MDS_plotter <- function(assay_data,colored_by){
df1 <- na.omit(assay_data)
graph1 <- ggplot(df1, aes_string(x="Dim1", y="Dim2", color=colored_by, text = "Animal_ID")) + geom_point(size=3)
graph2 <- ggplot(df1, aes_string(x="Dim1", y="Dim3", color=colored_by, text = "Animal_ID")) + geom_point(size=3)
graph3 <- ggplot(df1, aes_string(x="Dim2", y="Dim3", color=colored_by, text = "Animal_ID")) + geom_point(size=3)
return(graph1+graph2+graph3+plot_layout(guides = 'collect'))}
MDS1 <- MDS_plotter(mds_df,"Diet")
MDS2 <- MDS_plotter(mds_df,"timepoint")
graphs <- MDS1/MDS2
return(graphs)}
mds_function(filtered_sumExp)
Outliers on MDS (208321 is an outlier for lipogenesis measurement)
outliers_4_week <- c("20842","21167","20833","20821","21182","20831")
filtered_sumExp_no_outliers <- filtered_sumExp[,!colnames(filtered_sumExp) %in% outliers_4_week]
mds_function(filtered_sumExp_no_outliers)
Making a tidy table
Meta_tidy <- as.data.frame(filtered_sumExp_no_outliers@colData) %>% rownames_to_column(var = "Animal_ID")
protein_tibble <- as_tibble(assay(filtered_sumExp_no_outliers), rownames = "proteins") %>% left_join(protein_table[,1:3],by="proteins")
tidy_protein_table <- pivot_longer(protein_tibble,cols= -c(proteins,Gene.names,unique.gene.name),names_to = "Animal_ID",values_to = "LogIntensity")
tidy_protein_table <- inner_join(Meta_tidy,tidy_protein_table,by="Animal_ID")
tidy_protein_table$Diet <- factor(tidy_protein_table$Diet, levels = c("chow","starch","fat"))
tidy_protein_table$time_on_diet <- factor(tidy_protein_table$time_on_diet, levels = c("four_weeks","thirty_weeks"))
limma testing for the interaction between time and diet and diet
limma_time_diet_int <- function(exp_obj){
diet <- factor(exp_obj@colData$Diet)
timepoint <- factor(exp_obj@colData$time_on_diet)
group <- factor(exp_obj@colData$sub_group)
model <- model.matrix(~0+group)
is.fullrank(model)
head(model)
colnames(model) <- sub(pattern = "group",replacement = "",x = colnames(model))
contrasts <- makeContrasts(fat_v_chow_v_time = ((fat_four_weeks - chow_four_weeks)-(fat_thirty_weeks - chow_thirty_weeks)),
starch_v_chow_v_time = ((starch_four_weeks - chow_four_weeks)-(starch_thirty_weeks - chow_thirty_weeks)),
fat_v_starch_v_time = ((fat_four_weeks - starch_four_weeks)-(fat_thirty_weeks - starch_thirty_weeks)),
fat_v_chow_4_weeks = fat_four_weeks-chow_four_weeks,
starch_v_chow_4_weeks = starch_four_weeks-chow_four_weeks,
fat_v_starch_4_weeks = fat_four_weeks - starch_four_weeks,
fat_v_chow_30_weeks = fat_thirty_weeks-chow_thirty_weeks,
starch_v_chow_30_weeks = starch_thirty_weeks-chow_thirty_weeks,
fat_v_starch_30_weeks = fat_thirty_weeks - starch_thirty_weeks,
age_effect = ((chow_thirty_weeks+fat_thirty_weeks+starch_thirty_weeks)/3 - (chow_four_weeks+fat_four_weeks+starch_four_weeks)/3)
,levels = model)
fit <- lmFit(assay(exp_obj), model)
plotSA(fit)
diet_res <- lapply(colnames(contrasts), function(cont){
fitCont <- contrasts.fit(fit, contrasts = contrasts[,cont])
fitCont <- eBayes(fitCont,trend = T,robust = T)
plotSA(fitCont)
dat <- topTable(fitCont, number = Inf, sort.by = "none")
left_join(rownames_to_column(dat,var="proteins"),protein_table[,1:3],by="proteins")
})
names(diet_res) <- colnames(contrasts)
return(diet_res)}
diet_time_int_res <- limma_time_diet_int(filtered_sumExp_no_outliers)
hist(diet_time_int_res$fat_v_chow_v_time$P.Value)
count(diet_time_int_res$fat_v_chow_v_time$adj.P.Val < 0.05,na.rm = T)
## [1] 6
hist(diet_time_int_res$starch_v_chow_v_time$P.Value)
count(diet_time_int_res$starch_v_chow_v_time$adj.P.Val < 0.05,na.rm = T)
## [1] 1
hist(diet_time_int_res$fat_v_starch_v_time$P.Value)
count(diet_time_int_res$fat_v_starch_v_time$adj.P.Val < 0.05,na.rm = T)
## [1] 0
hist(diet_time_int_res$age_effect$P.Value)
count(diet_time_int_res$age_effect$adj.P.Val < 0.05,na.rm = T)
## [1] 1344
Plotting the interactions between time on diet and diet
interaction_plot <- function(sheet_4_w,sheet_30_w,int_sheet){
merged_sheet <- inner_join(sheet_4_w,sheet_30_w, by = "proteins") %>% inner_join(int_sheet[,c(1,5,6)])
colnames(merged_sheet) <- gsub(".x","_4_weeks",colnames(merged_sheet))
colnames(merged_sheet) <- gsub(".y","_30_weeks",colnames(merged_sheet))
rt <- merged_sheet %>% mutate(sig = if_else(adj.P.Val < 0.05,"Sig","Non_Sig")) %>% filter(adj.P.Val_4_weeks<0.05 | adj.P.Val_30_weeks < 0.05)
graph <- ggplot(rt,aes(x=logFC_4_weeks,y=logFC_30_weeks,text=unique.gene.name_4_weeks,color=sig,alpha=sig))+geom_point()+geom_abline(intercept=0, slope=1)+
geom_text_repel(
data=rt %>% filter(sig == "Sig"),
aes(label=unique.gene.name_4_weeks),show.legend = F,color="red",position = position_nudge(y = 0.2))+
scale_color_manual(values = c("black","red"))+
theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", fill=NA, linewidth=1.5))+
scale_x_continuous(limits=c(-3,3))+
scale_y_continuous(limits=c(-3,3))+
annotate(geom = "text",x = -2,y = 2,label = paste("R = ",round(cor(rt$logFC_4_weeks,rt$logFC_30_weeks),digits = 2),"\n N =", length(rt$proteins),sep = ""))
return(graph)
}
fat_chow_int_plot <- interaction_plot(diet_time_int_res$fat_v_chow_4_weeks,diet_time_int_res$fat_v_chow_30_weeks,diet_time_int_res$fat_v_chow_v_time)
## Joining with `by = join_by(proteins)`
starch_chow_int_plot <- interaction_plot(diet_time_int_res$starch_v_chow_4_weeks,diet_time_int_res$starch_v_chow_30_weeks,diet_time_int_res$starch_v_chow_v_time)
## Joining with `by = join_by(proteins)`
fat_starch_int_plot <- interaction_plot(diet_time_int_res$fat_v_starch_4_weeks,diet_time_int_res$fat_v_starch_30_weeks,diet_time_int_res$fat_v_starch_v_time)
## Joining with `by = join_by(proteins)`
fat_chow_int_plot/starch_chow_int_plot/fat_starch_int_plot
## Warning: Using alpha for a discrete variable is not advised.
## Using alpha for a discrete variable is not advised.
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Using alpha for a discrete variable is not advised.
Boxplot of sig diet x time int
boxplot_gene_name <- function(Gene_names,ncols){
tidy_protein_table$time_on_diet <- factor(tidy_protein_table$time_on_diet)
graph <- filter(tidy_protein_table, unique.gene.name %in% Gene_names) %>% ggplot(aes(x = time_on_diet , y = LogIntensity, fill = Diet)) +
geom_boxplot(color = "black",alpha=0.5) +
geom_point(aes(color=Diet,x=time_on_diet),position = position_dodge(width=0.75)) + facet_wrap(~unique.gene.name, scales ="free_y",ncol = ncols)+
scale_fill_manual(values=diet_colors) +
scale_color_manual(values=diet_colors) +
scale_x_discrete(labels=c("four_weeks" = "4", "thirty_weeks" = "30")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=1))
return(graph)}
diet_time_int <- c("Scpep1","Amacr","Trip12","Gstp1","Cyp4a12b","Gstm2","Cyp4a10")
boxplot_gene_name(diet_time_int,2)
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
Making a z scored data frame to plot averages between protein groups
z_scored_m <- t(scale(t(assay(filtered_sumExp_no_outliers))))
z_tidy <- as.data.frame(filtered_sumExp_no_outliers@colData) %>% rownames_to_column(var = "Animal_ID")
z_tibble <- as_tibble(z_scored_m, rownames = "proteins") %>% left_join(protein_table[,1:3],by="proteins")
tidy_z <- pivot_longer(z_tibble,cols= -c(proteins,Gene.names,unique.gene.name),names_to = "Animal_ID",values_to = "LogIntensity")
tidy_z <- inner_join(z_tidy,tidy_z,by="Animal_ID")
tidy_z$Diet <- factor(tidy_z$Diet, levels = c("chow","starch","fat"))
tidy_z$time_on_diet <- factor(tidy_z$time_on_diet, levels = c("four_weeks","thirty_weeks"))
summary_z <- summarySE(tidy_z, measurevar="LogIntensity", groupvars=c("Animal_ID","Diet","time_on_diet"),na.rm = T)
# Plotting protein summaries
Summary_box_plotter <- function(protein_filter_sheet){
graph <- tidy_z %>% filter(proteins %in% protein_filter_sheet$proteins) %>%
summarySE(measurevar="LogIntensity", groupvars=c("Animal_ID","Diet","time_on_diet"),na.rm = T) %>%
ggplot(aes(x=time_on_diet,y=LogIntensity,fill=Diet)) +
geom_boxplot(color = "black",alpha=0.5) +
geom_point(aes(color=Diet,x=time_on_diet),position = position_dodge(width=0.75)) +
scale_fill_manual(values = diet_colors) +
scale_color_manual(values = diet_colors) +
scale_y_continuous(limits = c(-1,1)) +
ylab(label = "LogIntensity (Mean Centered)") +
xlab("Weeks on Diet") +
scale_x_discrete(labels=c("four_weeks" = "4", "thirty_weeks" = "30")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=1))
return(graph)}
(Summary_box_plotter(lipo_res_positive)/Summary_box_plotter(trig_res_positive)/Summary_box_plotter(chow_proteins)/Summary_box_plotter(diet_time_int_res$fat_v_chow_v_time)) + plot_layout(guides = "collect")
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
2-way ANOVA stats for protein groups
protein_group_2_wayANOVA <- function(protein_filter_sheet){
filt <- tidy_z %>% filter(proteins %in% protein_filter_sheet$proteins) %>%
summarySE(measurevar="LogIntensity", groupvars=c("Animal_ID","Diet","time_on_diet"),na.rm = T)
filt$Diet <- factor(filt$Diet)
filt$time_on_diet <- factor(filt$time_on_diet)
res <- aov(LogIntensity ~ Diet * time_on_diet, data = filt)
return(res)}
lipo_res_ANOVA <- protein_group_2_wayANOVA(lipo_res_positive)
summary(lipo_res_ANOVA)
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 2 10.079 5.040 27.050 6.02e-09 ***
## time_on_diet 1 0.530 0.530 2.842 0.0974 .
## Diet:time_on_diet 2 1.615 0.808 4.335 0.0178 *
## Residuals 56 10.433 0.186
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
trig_res_ANOVA <- protein_group_2_wayANOVA(trig_res_positive)
summary(trig_res_ANOVA)
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 2 12.062 6.031 180.362 < 2e-16 ***
## time_on_diet 1 0.442 0.442 13.207 0.000607 ***
## Diet:time_on_diet 2 0.211 0.106 3.156 0.050254 .
## Residuals 56 1.873 0.033
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
chow_proteins_res_ANOVA <- protein_group_2_wayANOVA(chow_proteins)
summary(chow_proteins_res_ANOVA)
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 2 5.081 2.5403 102.230 < 2e-16 ***
## time_on_diet 1 0.409 0.4094 16.476 0.000154 ***
## Diet:time_on_diet 2 0.013 0.0065 0.262 0.770538
## Residuals 56 1.392 0.0248
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
all_proteins_res_ANOVA <- protein_group_2_wayANOVA(diet_time_int_res$fat_v_chow_v_time)
summary(all_proteins_res_ANOVA)
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 2 0.1138 0.05690 4.110 0.021593 *
## time_on_diet 1 0.1804 0.18040 13.033 0.000654 ***
## Diet:time_on_diet 2 0.0477 0.02383 1.722 0.188046
## Residuals 56 0.7751 0.01384
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1